#!/usr/bin/env bash

# nodefailresume script

# The environment variables from slurmctld do not include USER etc.
export USER=`whoami`
export PATH=/usr/local/bin:$PATH
# Logfile for suspend/resume actions
# N.B.: Make sure this file is writable by user slurm
export LOGFILE=/var/log/slurm/nodefailresume.log
# Make sure the LOGFILE has correct permissions
touch $LOGFILE
chown slurm: $LOGFILE
chmod 644 $LOGFILE

export DATE=`date +"%b %d %T"`
echo "$DATE nodefailresume nodes $@" >> $LOGFILE

slurm_notify=<sysadmin-email>
my_mail=/usr/bin/mailx

# Notify Slurm administrator of failed node resume
sinfo -lRN | $my_mail -s "Nodes $@ failed to resume" $slurm_notify
